'''
* This is the projet for Brtc LlmOps Platform
* @Author Leon-liao <liaosiliang@alltman.com>
* @Description //TODO 
* @File: semantic_retriever.py
* @Time: 2025/11/14
* @All Rights Reserve By Brtc
'''
from uuid import UUID

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document as LCDocument

from langchain_core.retrievers import BaseRetriever
from langchain_weaviate import WeaviateVectorStore
from pydantic import Field
from weaviate.collections.classes.filters import Filter


class SemanticRetriever(BaseRetriever):
    """相似性检索器/向量检索器"""
    dataset_ids:list[UUID]
    vector_store:WeaviateVectorStore
    search_kwargs:dict = Field(default_factory=dict)

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> list[LCDocument]:
        """根据传递的query 执行相似性检索"""
        #1、提取最大的搜索条件K默认为4
        k = self.search_kwargs.pop("k",4)
        #2、执行相似性检索，并获取的分信息
        search_result = self.vector_store.similarity_search_with_relevance_scores(
            query=query,
            k=k,
            **{
                "filters":Filter.all_of([
                    Filter.by_property("dataset_id").contains_any([str(dataset_id) for dataset_id in self.dataset_ids]),
                    Filter.by_property("document_enabled").equal(True),
                    Filter.by_property("segment_enabled").equal(True)
                ]),
                **self.search_kwargs,
            }
        )

        if search_result is None or len(search_result) == 0:
            return []
        lc_documents, scores = zip(*search_result)
        for lc_document, score in zip(lc_documents, scores):
            lc_document.metadata["score"] = score


        return list(lc_documents)
